# Polarisation experiment Data Analysis (experiment 1)
# Step 1: Clean Data
# Authors: Mark Kayser & Kasia Nalewajko
# Date: Feb 6, 2023 (THIS: Feb 17, 2023)

rm(list = ls())

# LOAD PACKAGES -----------------------------------------------------------

if (!require("dplyr")) install.packages("dplyr")
if (!require("tidyr")) install.packages("tidyr")
if (!require("readxl")) install.packages("readxl")
if (!require("stringr")) install.packages("stringr")
if (!require("lubridate")) install.packages("lubridate")
if (!require("haven")) install.packages("haven")
if (!require("ggplot2")) install.packages("ggplot2")

# LOAD DATA ---------------------------------------------------------------

setwd("set/path/to/folder")

results <- read_excel("./01 data/01 raw data/study2/223416938_Hancza_2022_2.xlsx") %>% 
  dplyr::rename(
    
    # experiment components
    demographic_qs_first = BLOK1,
    experiment_first = BLOK2,
    exp_condition = P15_LOS,
    outcome = P16A,
    manipulation_check = P17A,
    experiment_interpretation = P18,
  
    # general political attitudes
    LR = V99,
    int_security_most_important = V9909,
         
    # party identification
    presidential_first_round = P4,
    presidential_second_round = P5,
    parliamentary = P7,
    party_id = P8,
    party_id_strength = P9,
    PiS_or_PO = P10,
    PiS_or_PO_strength = P11,
    political_ideology_GS2020 = P12, # from Graham & Svolik 2020 (henceforth: GS2020)
    party_id_PiS1_GS2020 = P13a_1, # GS2020 battery of 6 qs, PiS supporters version
    party_id_PiS2_GS2020 = P13a_2,
    party_id_PiS3_GS2020 = P13a_3,
    party_id_PiS4_GS2020 = P13a_4,
    party_id_PiS5_GS2020 = P13a_5,
    party_id_PiS6_GS2020 = P13a_6,
    party_id_PO1_GS2020 = P13b_1, # GS2020 battery of 6 qs, PO supporters version
    party_id_PO2_GS2020 = P13b_2,
    party_id_PO3_GS2020 = P13b_3,
    party_id_PO4_GS2020 = P13b_4,
    party_id_PO5_GS2020 = P13b_5,
    party_id_PO6_GS2020 = P13b_6,
         
    # controls
    yob = xtqag,
    gender = xtqgen,
    locality_size = tqcla,
    voivodeship = tqvoi,
    education = tqedu,
    survey_date = StartDate
    ) %>% 
  dplyr::select(demographic_qs_first,
               experiment_first,
               exp_condition,
               outcome,
               manipulation_check,
               experiment_interpretation,
               LR,
               int_security_most_important,
               presidential_first_round,
               presidential_second_round,
               parliamentary,
               party_id,
               party_id_strength,
               PiS_or_PO,
               PiS_or_PO_strength,
               political_ideology_GS2020,
               party_id_PiS1_GS2020,
               party_id_PiS2_GS2020,
               party_id_PiS3_GS2020,
               party_id_PiS4_GS2020,
               party_id_PiS5_GS2020,
               party_id_PiS6_GS2020,
               party_id_PO1_GS2020,
               party_id_PO2_GS2020,
               party_id_PO3_GS2020,
               party_id_PO4_GS2020,
               party_id_PO5_GS2020,
               party_id_PO6_GS2020,
               yob,
               gender,
               locality_size,
               voivodeship,
               education,
               survey_date
               ) %>%
  dplyr::mutate(age = 2022-as.numeric(yob))

# CLEAN DATA ----

# ADJUST LEVELS SO THAT THEY MAKE SUBSTANTIVE SENSE ------
results$exp_condition <- ifelse(results$exp_condition == 1, "ChinaPO", results$exp_condition)
results$exp_condition <- ifelse(results$exp_condition == 2, "ChinaPiS", results$exp_condition)
results$exp_condition <- ifelse(results$exp_condition == 3, "China", results$exp_condition)

# RECODE LEFT-RIGHT MEASURE -----

results$LR <- ifelse(results$LR > 10, NA, results$LR)

# RECODE WHICH_FIRST MEASURE ------

results <- results %>% 
  mutate(experiment_first = experiment_first-1)

# CREATE A FACTOR "PO/PIS" VARIABLE WITH RELATED STRENGTH OF PO/PIS SUPPORT (coded 1-5) ----

# what party do you identify with
results$popis <- ifelse(results$party_id == 3, "PiS", NA)
results$popis <- ifelse(results$party_id == 15, "PO", results$popis)

results %>% 
  group_by(popis) %>% 
  summarise(n = n()) # 764 respondents identified

# whom did you vote for in parliamentary elections in 2019
results$popis <- ifelse(results$parliamentary == 2 & is.na(results$popis), "PiS", results$popis)
results$popis <- ifelse(results$parliamentary == 5 & is.na(results$popis), "PO", results$popis)

results %>% 
  group_by(popis) %>% 
  summarise(n = n()) # further 213 respondents identified

# whom did you vote for in the first round of 2020 presidential elections (CHANGE TO "ROUND")
results$popis <- ifelse(results$presidential_first_round == 1 & is.na(results$popis), "PiS", results$popis)
results$popis <- ifelse(results$presidential_first_round == 2 & is.na(results$popis), "PO", results$popis)

results %>% 
  group_by(popis) %>% 
  summarise(n = n()) # further 158 respondents identified

# whom did you vote for in the second round of 2020 presidential elections
results$popis <- ifelse(results$presidential_second_round == 1 & is.na(results$popis), "PiS", results$popis)
results$popis <- ifelse(results$presidential_second_round == 2 & is.na(results$popis), "PO", results$popis)

results %>% 
  group_by(popis) %>% 
  summarise(n = n()) # further 171 respondents identified

results$forced_POPiS_choice <- ifelse(results$PiS_or_PO < 3 & is.na(results$popis), 1, 0) # 78 people were "forced to choose"
results$never_chooses_POPiS <- ifelse(results$PiS_or_PO == 999 & is.na(results$popis), 1, 0) # 116 people didn't agree to choose

# if you had to choose...
results$popis <- ifelse(results$PiS_or_PO == 1 & is.na(results$popis), "PiS", results$popis)
results$popis <- ifelse(results$PiS_or_PO == 2 & is.na(results$popis), "PO", results$popis)

results$popis <- as.factor(results$popis)

results %>% 
  group_by(popis) %>% 
  summarise(n = n())


# CALCULATE STRENGTH OF PARTISANSHIP (Graham and Svolik) -------

results$conservative <- results$political_ideology_GS2020
results$conservative <- ifelse(results$conservative > 7, NA, results$conservative)

results %>% 
  group_by(conservative) %>% 
  summarise(n = n())

results$GSparty_id1 <- results$party_id_PiS1_GS2020
results$GSparty_id1 <- ifelse(is.na(results$GSparty_id1), results$party_id_PO1_GS2020, results$GSparty_id1)

results$GSparty_id2 <- results$party_id_PiS2_GS2020
results$GSparty_id2 <- ifelse(is.na(results$GSparty_id2), results$party_id_PO2_GS2020, results$GSparty_id2)

results$GSparty_id3 <- results$party_id_PiS3_GS2020
results$GSparty_id3 <- ifelse(is.na(results$GSparty_id3), results$party_id_PO3_GS2020, results$GSparty_id3)

results$GSparty_id4 <- results$party_id_PiS4_GS2020
results$GSparty_id4 <- ifelse(is.na(results$GSparty_id4), results$party_id_PO4_GS2020, results$GSparty_id4)

results$GSparty_id5 <- results$party_id_PiS5_GS2020
results$GSparty_id5 <- ifelse(is.na(results$GSparty_id5), results$party_id_PO5_GS2020, results$GSparty_id5)

results$GSparty_id6 <- results$party_id_PiS6_GS2020
results$GSparty_id6 <- ifelse(is.na(results$GSparty_id6), results$party_id_PO6_GS2020, results$GSparty_id6)

results <- results %>% 
  rowwise() %>% 
  mutate(GSparty_id = sum(across(c(GSparty_id2, GSparty_id3, GSparty_id4, GSparty_id5, GSparty_id6)), na.rm = T))

results <- results %>% 
  mutate(GSparty_id = GSparty_id-GSparty_id1)

results %>% 
  group_by(GSparty_id) %>% 
  summarise(n = n())

sum(is.na(results$GSparty_id))

# CREATE NEGATIVE PARTISANSHIP BASED ON Graham and Svolik (Q4 + Q6) ------

# inspect
results$GSparty_id4
results$GSparty_id6 # the higher the score, the more hateful of the rival they are

results <- results %>% 
  rowwise() %>% 
  mutate(GSnegative_partisan = sum(GSparty_id4, GSparty_id6, na.rm = T))
results$GSnegative_partisan <- ifelse(is.na(results$GSparty_id), NA, results$GSnegative_partisan)

# STANDARDISE THE "STRENGTH OF PARTISANSHIP" MEASURES -------

zscore <- function(x, ...) {(x - mean(x, ...)) / sd(x, ...)}

results$conservative_z <- zscore(results$conservative, na.rm = T)
results$GSparty_id_z <- zscore(results$GSparty_id, na.rm = T)
results$GSnegative_partisan_z <- zscore(results$GSnegative_partisan, na.rm = T)

# CORRECT VARIABLE CLASSES FOR REGRESSIONS -------

results$exp_condition <- as.factor(results$exp_condition)
results$voivodeship <- as.factor(results$voivodeship)
results$yob <- as.numeric(results$yob)
results$locality_size <- as.numeric(results$locality_size)
results$education <- as.numeric(results$education)

# RECODE GENDER VARIABLE -------

results$female <- ifelse(results$gender == "k", 1, 0)

# CREATE TWO SAMPLES "PO supporters" only AND "PiS supporters" only AND SAVE SEPARATELY ------

results_POsupporters <- results %>% 
  filter(popis == "PO")

results_PiSsupporters <- results %>% 
  filter(popis == "PiS")

# SAVE CLEAN DATA -------

save(results, file = "./01 data/02 generated data/study2/02results_fullsample_clean.Rda")




